# -*- coding: utf-8 -*-
import subprocess
import re
import datetime
import ast
import os


binlog_file_name = 'mysql-bin.007756'



###使用mysqlbinlog命令对binlog进行分析，生成分析文件
cmd = '''
mysqlbinlog  %s --base64-output=decode-rows -v | egrep '### INSERT INTO|### UPDATE|### DELETE FROM|Xid =|^BEGIN|^COMMIT|ROLLBACK|Table_map:'
'''%(binlog_file_name)
#cmd = cmd.replace('')

# cmd = '''
# mysqlbinlog mysql-bin.000004 --base64-output=decode-rows -v | egrep '### INSERT INTO|### UPDATE|### DELETE FROM|Xid =|^BEGIN|^COMMIT|ROLLBACK|Table_map:'
# '''
# fdout = open("mysql_binlog_file_out.log",'w+')
# process = subprocess.Popen(cmd, shell=True, stdout=fdout)
# fdout.close()
with open("mysql_binlog_file_out.log",'w+') as f:
    output = subprocess.getstatusoutput(cmd)
    f.write(output[1])
    f.close()
##############超限定义#################
trans_dml_count = 1200
trans_exec_time = 2
trans_sql_exec_count = 100
######################################

#binglog 文件名
binglogfile_name=''
##mysqlbinlog解析后的文件名
parse_binglogfile_name='mysql_binlog_file_out.log'
#parse_binglogfile_name='mysqlbinlog.txt'
date1  = datetime.datetime.now()

###初始化 计数器 (汇总)
#汇总dic
file_info = {}
#处理文件行数
handle_file_count=0
#总事务
trans_count=0
#汇总 dml总数量
dml_total = {'INSERT': 0, 'UPDATE': 0, 'DELETE': 0, 'EXEC_COUNT': 0}


###正则匹配
#事务数
pattern_xid = re.compile(r'.+(Xid = ).+')
#事务开始
pattern_xid_start = re.compile(r'^BEGIN$')
#pattern_xid_start = re.compile(r'.+(Query)\s+(thread_id=).+')
#事务结束
# pattern_xid_end = re.compile(r'^(COMMIT|ROLLBACK)')
pattern_xid_end = re.compile(r'.+(Xid = ).+')
pattern_xid_end_commit =  re.compile(r'^COMMIT')
pattern_xid_end_rollback =  re.compile(r'^ROLLBACK')
##table_map
pattern_table_map =  re.compile(r'.+(Table_map:).+')
#DML
  ##update
pattern_dml_update =  re.compile(r'^(### UPDATE)')
  ##delete
pattern_dml_delete =  re.compile(r'^(### DELETE)')
  ##insert
pattern_dml_insert =  re.compile(r'^(### INSERT INTO)')

command_begin = '''
awk '/^BEGIN/{printf "%d,",NR} END{print ""}'  parse_binglogfile_name | sed 's/,$//; s/.*/[&]/' 
'''
command_begin = command_begin.replace('parse_binglogfile_name',parse_binglogfile_name)
command_commit = '''
awk '/^COMMIT|^ROLLBACK/{printf "%d,",NR} END{print ""}'  parse_binglogfile_name | sed 's/,$//; s/.*/[&]/'
'''
command_commit = command_commit.replace('parse_binglogfile_name',parse_binglogfile_name)
p = subprocess.getstatusoutput(command_begin)
trans_start_list = ast.literal_eval(p[1])
# print(trans_start_list)
p = subprocess.getstatusoutput(command_commit)
trans_stop_list = ast.literal_eval(p[1])
# print(trans_start_list)
# exit(1)
if trans_stop_list[0] < trans_start_list[0]:
    del trans_stop_list[0]
if trans_stop_list[-1] < trans_start_list[-1]:
    del trans_start_list[-1]
with open(parse_binglogfile_name, 'r', encoding='utf-8') as f:
    file_content_list = f.readlines()
    forbreak_flag = 0
    limit_trans_count = 0
    for i in range(len(trans_start_list)):
        ##事务总数
        trans_count = trans_count + 1
        # forbreak_flag = forbreak_flag + 1
        # if  forbreak_flag >1 :
        #     break
        file_trans_content = file_content_list[(trans_start_list[i] -1):(trans_stop_list[i])]
        ##明细
        tx_dml_stats = {}
        #本次 dml总数量
        tx_dml_total = {'INSERT': 0, 'UPDATE': 0, 'DELETE': 0, 'DML_TOTAL': 0, 'EXEC_COUNT': 0}
        ##xid 时间
        start_time = None
        for trans_line in file_trans_content:
            if pattern_table_map.search(trans_line):
                table_name = trans_line.split('Table_map:')[1].split()[0]
                dml_total['EXEC_COUNT'] += 1
                tx_dml_total['EXEC_COUNT'] += 1
                tx_dml_stats[table_name] = tx_dml_stats.get(table_name,
                                                            {'INSERT': 0, 'UPDATE': 0, 'DELETE': 0, 'EXEC_COUNT': 0})
                if table_name in tx_dml_stats:
                    tx_dml_stats[table_name]['EXEC_COUNT'] += 1
                if start_time is None:
                    start_time = trans_line.split()[1]
                continue;
            if pattern_dml_update.search(trans_line):
                tx_dml_stats[table_name]['UPDATE'] += 1
                ##汇总计数
                dml_total['UPDATE'] += 1
                ##本次事务计数
                tx_dml_total['UPDATE'] += 1
                tx_dml_total['DML_TOTAL'] += 1
                continue;
            if pattern_dml_delete.search(trans_line):
                tx_dml_stats[table_name]['DELETE'] += 1
                ##汇总计数
                dml_total['DELETE'] += 1
                ##本次事务计数
                tx_dml_total['DELETE'] += 1
                tx_dml_total['DML_TOTAL'] += 1
                continue;
            if pattern_dml_insert.search(trans_line):
                tx_dml_stats[table_name]['INSERT'] += 1
                ##汇总计数
                dml_total['INSERT'] += 1
                ##本次事务计数
                tx_dml_total['INSERT'] += 1
                tx_dml_total['DML_TOTAL'] += 1
                continue;
            if pattern_xid.search(trans_line):
                end_time = trans_line.split()[1]
                xid = trans_line.split('Xid =')[1].replace('\n', '').strip()
                continue;
            if pattern_xid_end_commit.search(trans_line):
                ##事务commit
                tx_dml_total['Xid_STATUS'] = 'COMMIT'
                continue;
            if pattern_xid_end_rollback.search(trans_line):
                ##事务rollback
                tx_dml_total['Xid_STATUS'] = 'ROLLBACK'
                continue;
##########打印区################
        ##事务明细
        # 事务时间
        start_time1 = datetime.datetime.strptime(start_time, "%H:%M:%S")
        end_time1 = datetime.datetime.strptime(end_time, "%H:%M:%S")
        time = (end_time1 - start_time1).seconds
        if tx_dml_total['DML_TOTAL'] >= trans_dml_count or time >= trans_exec_time or tx_dml_total['EXEC_COUNT'] >=  trans_sql_exec_count:
            limit_trans_count += 1
            # 打印
            print('\n','xid_info:',xid ,' start_time:',start_time,' time:',str(time)+'s',' dml_info:',tx_dml_total)
            print('-' * 13)
            for key, value in tx_dml_stats.items():
                print("{%s: %s}" % (key, value))

    ##超限汇总
    limit_content = '''
    共有超限事务%s个
    -----------超限规则--------------
    # 事务中dml数量      > %s
    # 事务执行时间       >  %s
    # 事务中sql执行数量   > %s
    '''%(limit_trans_count,trans_dml_count,trans_exec_time,trans_exec_time)
    print(limit_content)
    date3 = datetime.datetime.now()
    time = (date3 - date1).seconds
    time = str(time) + 's'
    file_info['parse_fileName'] = parse_binglogfile_name
    #file_info['handle_file_count'] = handle_file_count
    file_info['trans_count'] = trans_count
    file_info['time'] = time
    file_info.update(dml_total)
    print('\n','Total')
    print(file_info)
    os.remove(parse_binglogfile_name)

